library(tidyverse)
library(janitor)
library(here)
library(readxl)
library(lubridate)
library(stringr)
Clean up bb_candy_2015
Clean up bb_candy_2016
bb_candy_2016_string_to_lower <- bb_candy_2016_date %>%
mutate(country = str_to_lower(country))
bb_candy_2016_str_replace_test <- bb_candy_2016_string_to_lower %>%
mutate(country = str_replace_all(country, "us", "usa"),
country = str_replace_all(country, "usaa", "usa"),
country = str_replace_all(country, "united states of america", "usa"),
country = str_replace_all(country, "merica", "usa"),
country = str_replace_all(country, "usa!", "usa"),
country = str_replace_all(country, "u.s.", "usa"),
country = str_replace_all(country, "usa usa usa", "usa"),
country = str_replace_all(country, "united sates", "usa"),
country = str_replace_all(country, "units states", "usa"),
country = str_replace_all(country, "usa usa", "usa"),
country = str_replace_all(country, "the yoo ess of aaayyyyyy", "usa"),
country = str_replace_all(country, "murica", "usa"),
country = str_replace_all(country, "the best one - usa", "usa"),
country = str_replace_all(country, "trumpistan", "usa"),
country = str_replace_all(country, "units state", "usa"),
country = str_replace_all(country, "united states", "usa"),
country = str_replace_all(country, "united state", "usa"),
country = str_replace_all(country, "united stetes", "usa"),
country = str_replace_all(country, "usa!!!!!", "usa"),
country = str_replace_all(country, "usaa.", "usa"),
country = str_replace_all(country, "usasa", "usa"),
country = str_replace_all(country, "usa \\((i think but it's an election year so who can really tell\\))", "usa"),
country = str_replace_all(country, "cascadia", "usa"),
country = str_replace_all(country, "NANA.NA", "usa"),
country = str_replace_all(country, "the republic of cascadia", "usa"),
country = str_replace_all(country, "[0-9]", "NA"),
country = str_replace_all(country, "united kingdom", "uk"),
country = str_replace_all(country, "united kindon", "uk"),
country = str_replace_all(country, "united kingdon", "uk"),
country = str_replace_all(country, "united kindom", "uk"),
country = str_replace_all(country, "england", "uk")
)
bb_candy_2016_country_count_test <- bb_candy_2016_str_replace_test %>%
distinct(country) %>%
summarise(n())
bb_candy_2016_country_count_test
distinct_country_remove <- bb_candy_2016_str_replace_test %>%
distinct(country)
view(distinct_country_remove)
LS0tCnRpdGxlOiAiVGFzayA0IC0gQ2xlYW5pbmciCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShqYW5pdG9yKQpsaWJyYXJ5KGhlcmUpCmxpYnJhcnkocmVhZHhsKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKbGlicmFyeShzdHJpbmdyKQpgYGAKCmBgYHtyfQpiYl9jYW5keV8yMDE1IDwtIHJlYWRfeGxzeCgiZGF0YS9yYXdfZGF0YS9ib2luZy1ib2luZy1jYW5keS0yMDE1Lnhsc3giKQpiYl9jYW5keV8yMDE2IDwtIHJlYWRfeGxzeCgiZGF0YS9yYXdfZGF0YS9ib2luZy1ib2luZy1jYW5keS0yMDE2Lnhsc3giKQpiYl9jYW5keV8yMDE3IDwtIHJlYWRfeGxzeCgiZGF0YS9yYXdfZGF0YS9ib2luZy1ib2luZy1jYW5keS0yMDE3Lnhsc3giKQpgYGAKCmBgYHtyfQpkaW0oYmJfY2FuZHlfMjAxNSkKCnBpdm90XzIwMTVfdGVzdCA8LSBiYl9jYW5keV8yMDE1ICU+JSAKICBwaXZvdF9sb25nZXIoY29scyA9ICgiW0J1dHRlcmZpbmdlcl0iOiJbWW9yayBQZXBwZXJtaW50IFBhdHRpZXNdIiksCiAgICAgICAgICAgICAgIG5hbWVzX3RvID0gInN3ZWV0cyIsCiAgICAgICAgICAgICAgIHZhbHVlc190byA9ICJlbmpveW1lbnQiKQoKcGl2b3Rfc2VsZWN0XzIwMTVfdGVzdCA8LSBwaXZvdF8yMDE1X3Rlc3QgJT4lIAogIHNlbGVjdChUaW1lc3RhbXAsIGBIb3cgb2xkIGFyZSB5b3U/YCwgYEFyZSB5b3UgZ29pbmcgYWN0dWFsbHkgZ29pbmcgdHJpY2sgb3IgdHJlYXRpbmcgeW91cnNlbGY/YCwgc3dlZXRzLCBlbmpveW1lbnQpCmRpbShwaXZvdF8yMDE1X3Rlc3QpCmBgYApgYGB7cn0KcGl2b3Rfc2VsZWN0X2dyb3VwXzIwMTUgPC0gcGl2b3Rfc2VsZWN0XzIwMTVfdGVzdCAlPiUgCiAgZ3JvdXBfYnkoc3dlZXRzKSAlPiUgCiAgc3VtbWFyaXNlKG4oKSkKYGBgCgoKQ2xlYW4gdXAgYmJfY2FuZHlfMjAxNQpgYGB7cn0KYmJfY2FuZHlfMjAxNV9hZ2Vfc3RyX3JlbW92ZSA8LSBiYl9jYW5keV8yMDE1ICU+JSAKICBtdXRhdGUoYEhvdyBvbGQgYXJlIHlvdT9gID0gc3RyX3JlbW92ZShgSG93IG9sZCBhcmUgeW91P2AsICJcXC4uKyIpKQoKYmJfY2FuZHlfMjAxNV9hZ2VfbnVtZXJpYyA8LSBiYl9jYW5keV8yMDE1X2FnZV9zdHJfcmVtb3ZlICU+JSAKICBtdXRhdGUoYEhvdyBvbGQgYXJlIHlvdT9gID0gYXMubnVtZXJpYyhgSG93IG9sZCBhcmUgeW91P2ApKSAlPiUgCiAgcmVuYW1lKCJhZ2UiID0gYEhvdyBvbGQgYXJlIHlvdT9gLAogICAgICAgICAidHJpY2tfb3JfdHJlYXRpbmdfYWxvbmUiID0gYEFyZSB5b3UgZ29pbmcgYWN0dWFsbHkgZ29pbmcgdHJpY2sgb3IgdHJlYXRpbmcgeW91cnNlbGY/YCkKCmJiX2NhbmR5XzIwMTVfY2xlYW5fbmFtZXMgPC0gYmJfY2FuZHlfMjAxNV9hZ2VfbnVtZXJpYyAlPiUgCiAgY2xlYW5fbmFtZXMoKQoKYmJfY2FuZHlfMjAxNV9yZWxvY2F0ZV9uZXR0byA8LSBiYl9jYW5keV8yMDE1X2NsZWFuX25hbWVzICU+JSAKICByZWxvY2F0ZShuZWNjb193YWZlcnMsIC5hZnRlciA9IHBpeHlfc3RpeCkKCmJiX2NhbmR5XzIwMTVfc2VsZWN0IDwtIGJiX2NhbmR5XzIwMTVfcmVsb2NhdGVfbmV0dG8gJT4lIAogIHNlbGVjdCh0aW1lc3RhbXAsIGFnZSwgdHJpY2tfb3JfdHJlYXRpbmdfYWxvbmUsIGJ1dHRlcmZpbmdlcjp5b3JrX3BlcHBlcm1pbnRfcGF0dGllcykKCm5hbWVzKGJiX2NhbmR5XzIwMTVfc2VsZWN0KQoKYmJfY2FuZHlfMjAxNV9vcmRlcl9jb2xzIDwtIGJiX2NhbmR5XzIwMTVfc2VsZWN0ICU+JSAKICBzZWxlY3QodGltZXN0YW1wLCBhZ2UsIHRyaWNrX29yX3RyZWF0aW5nX2Fsb25lLCBzb3J0KGNvbG5hbWVzKC4pKSkKCmJiX2NhbmR5XzIwMTVfZGF0ZSA8LSBiYl9jYW5keV8yMDE1X29yZGVyX2NvbHMgJT4lIAogIG11dGF0ZSh0aW1lc3RhbXAgPSBhcy5EYXRlKHRpbWVzdGFtcCkpICU+JSAKICByZW5hbWUoImRhdGUiID0gInRpbWVzdGFtcCIpCmBgYAoKCkNsZWFuIHVwIGJiX2NhbmR5XzIwMTYKYGBge3J9CiMgQ2hhbmdlIGNvbHVtbiBoZWFkZXJzCgpiYl9jYW5keV8yMDE2X25ld19oZWFkZXJzIDwtIGJiX2NhbmR5XzIwMTYgJT4lIAogIHJlbmFtZSgidHJpY2tfb3JfdHJlYXRpbmdfYWxvbmU/IiA9IGBBcmUgeW91IGdvaW5nIGFjdHVhbGx5IGdvaW5nIHRyaWNrIG9yIHRyZWF0aW5nIHlvdXJzZWxmP2AsCiAgICAgICAgICJnZW5kZXIiID0gYFlvdXIgZ2VuZGVyOmAsCiAgICAgICAgICJhZ2UiID0gYEhvdyBvbGQgYXJlIHlvdT9gLAogICAgICAgICAiY291bnRyeSIgPSBgV2hpY2ggY291bnRyeSBkbyB5b3UgbGl2ZSBpbj9gLAogICAgICAgICAic3RhdGUiID0gYFdoaWNoIHN0YXRlLCBwcm92aW5jZSwgY291bnR5IGRvIHlvdSBsaXZlIGluP2AKICApCgojIENoYW5nZSBhZ2UgdG8gYXMubnVtZXJpYyBhbmQgcmVtb3ZlIGRlY2ltYWwKYmJfY2FuZHlfMjAxNl9hZ2Vfc3RyX3JlbW92ZSA8LSBiYl9jYW5keV8yMDE2X25ld19oZWFkZXJzICU+JSAKICBtdXRhdGUoYWdlID0gc3RyX3JlbW92ZShhZ2UsICJcXC4uKyIpKSAlPiUgCiAgbXV0YXRlKGFnZSA9IGFzLm51bWVyaWMoYWdlKSkKCiMgQ2xlYW4gdGhlIGNvbHVtbiBoZWFkZXJzCgpiYl9jYW5keV8yMDE2X2NsZWFuX25hbWVzIDwtIGJiX2NhbmR5XzIwMTZfYWdlX3N0cl9yZW1vdmUgJT4lIAogIGNsZWFuX25hbWVzKCkKCmJiX2NhbmR5XzIwMTZfc2VsZWN0IDwtIGJiX2NhbmR5XzIwMTZfY2xlYW5fbmFtZXMgJT4lIAogIHNlbGVjdCh0aW1lc3RhbXAsIHRyaWNrX29yX3RyZWF0aW5nX2Fsb25lLCBnZW5kZXIsIGFnZSwgY291bnRyeSwgc3RhdGUsIHgxMDBfZ3JhbmRfYmFyOnlvcmtfcGVwcGVybWludF9wYXR0aWVzKQoKYmJfY2FuZHlfMjAxNl9vcmRlcl9jb2xzIDwtIGJiX2NhbmR5XzIwMTZfc2VsZWN0ICU+JSAKICBzZWxlY3QodGltZXN0YW1wLCB0cmlja19vcl90cmVhdGluZ19hbG9uZSwgZ2VuZGVyLCBhZ2UsIGNvdW50cnksIHN0YXRlLCBzb3J0KGNvbG5hbWVzKC4pKSwgLXBlcnNvbl9vZl9pbnRlcmVzdF9zZWFzb25fM19kdmRfYm94X3NldF9ub3RfaW5jbHVkaW5nX2Rpc2NfNF93aXRoX2hpbGFyaW91c19vdXR0YWtlcykKCmJiX2NhbmR5XzIwMTZfZGF0ZSA8LSBiYl9jYW5keV8yMDE2X29yZGVyX2NvbHMgJT4lIAogIG11dGF0ZSh0aW1lc3RhbXAgPSBhcy5EYXRlKHRpbWVzdGFtcCkpICU+JSAKICByZW5hbWUoImRhdGUiID0gInRpbWVzdGFtcCIpCmBgYAoKYGBge3J9CmJpbmRfcm93c190ZXN0ICA8LSBiaW5kX3Jvd3MoYmJfY2FuZHlfMjAxNV9kYXRlLCBiYl9jYW5keV8yMDE2X2RhdGUpCmBgYAoKYGBge3J9CmJiX2NhbmR5XzIwMTZfc3RyaW5nX3RvX2xvd2VyIDwtIGJiX2NhbmR5XzIwMTZfZGF0ZSAlPiUgCiAgbXV0YXRlKGNvdW50cnkgPSBzdHJfdG9fbG93ZXIoY291bnRyeSkpCgpiYl9jYW5keV8yMDE2X3N0cl9yZXBsYWNlX3Rlc3QgPC0gYmJfY2FuZHlfMjAxNl9zdHJpbmdfdG9fbG93ZXIgJT4lIAogIG11dGF0ZShjb3VudHJ5ID0gc3RyX3JlcGxhY2VfYWxsKGNvdW50cnksICJ1cyIsICJ1c2EiKSwKICAgICAgICAgY291bnRyeSA9IHN0cl9yZXBsYWNlX2FsbChjb3VudHJ5LCAidXNhYSIsICJ1c2EiKSwKICAgICAgICAgY291bnRyeSA9IHN0cl9yZXBsYWNlX2FsbChjb3VudHJ5LCAidW5pdGVkIHN0YXRlcyBvZiBhbWVyaWNhIiwgInVzYSIpLAogICAgICAgICBjb3VudHJ5ID0gc3RyX3JlcGxhY2VfYWxsKGNvdW50cnksICJtZXJpY2EiLCAidXNhIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgInVzYSEiLCAidXNhIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgInUucy4iLCAidXNhIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgInVzYSB1c2EgdXNhIiwgInVzYSIpLAogICAgICAgICBjb3VudHJ5ID0gc3RyX3JlcGxhY2VfYWxsKGNvdW50cnksICJ1bml0ZWQgc2F0ZXMiLCAidXNhIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgInVuaXRzIHN0YXRlcyIsICJ1c2EiKSwKICAgICAgICAgY291bnRyeSA9IHN0cl9yZXBsYWNlX2FsbChjb3VudHJ5LCAidXNhIHVzYSIsICJ1c2EiKSwKICAgICAgICAgY291bnRyeSA9IHN0cl9yZXBsYWNlX2FsbChjb3VudHJ5LCAidGhlIHlvbyBlc3Mgb2YgYWFheXl5eXl5IiwgInVzYSIpLAogICAgICAgICBjb3VudHJ5ID0gc3RyX3JlcGxhY2VfYWxsKGNvdW50cnksICJtdXJpY2EiLCAidXNhIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgInRoZSBiZXN0IG9uZSAtIHVzYSIsICJ1c2EiKSwKICAgICAgICAgY291bnRyeSA9IHN0cl9yZXBsYWNlX2FsbChjb3VudHJ5LCAidHJ1bXBpc3RhbiIsICJ1c2EiKSwKICAgICAgICAgY291bnRyeSA9IHN0cl9yZXBsYWNlX2FsbChjb3VudHJ5LCAidW5pdHMgc3RhdGUiLCAidXNhIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgInVuaXRlZCBzdGF0ZXMiLCAidXNhIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgInVuaXRlZCBzdGF0ZSIsICJ1c2EiKSwKICAgICAgICAgY291bnRyeSA9IHN0cl9yZXBsYWNlX2FsbChjb3VudHJ5LCAidW5pdGVkIHN0ZXRlcyIsICJ1c2EiKSwKICAgICAgICAgY291bnRyeSA9IHN0cl9yZXBsYWNlX2FsbChjb3VudHJ5LCAidXNhISEhISEiLCAidXNhIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgInVzYWEuIiwgInVzYSIpLAogICAgICAgICBjb3VudHJ5ID0gc3RyX3JlcGxhY2VfYWxsKGNvdW50cnksICJ1c2FzYSIsICJ1c2EiKSwKICAgICAgICAgY291bnRyeSA9IHN0cl9yZXBsYWNlX2FsbChjb3VudHJ5LCAidXNhIFxcKChpIHRoaW5rIGJ1dCBpdCdzIGFuIGVsZWN0aW9uIHllYXIgc28gd2hvIGNhbiByZWFsbHkgdGVsbFxcKSkiLCAidXNhIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgImNhc2NhZGlhIiwgInVzYSIpLAogICAgICAgICBjb3VudHJ5ID0gc3RyX3JlcGxhY2VfYWxsKGNvdW50cnksICJOQU5BLk5BIiwgInVzYSIpLAogICAgICAgICBjb3VudHJ5ID0gc3RyX3JlcGxhY2VfYWxsKGNvdW50cnksICJ0aGUgcmVwdWJsaWMgb2YgY2FzY2FkaWEiLCAidXNhIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgIlswLTldIiwgIk5BIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgInVuaXRlZCBraW5nZG9tIiwgInVrIiksCiAgICAgICAgIGNvdW50cnkgPSBzdHJfcmVwbGFjZV9hbGwoY291bnRyeSwgInVuaXRlZCBraW5kb24iLCAidWsiKSwKICAgICAgICAgY291bnRyeSA9IHN0cl9yZXBsYWNlX2FsbChjb3VudHJ5LCAidW5pdGVkIGtpbmdkb24iLCAidWsiKSwKICAgICAgICAgY291bnRyeSA9IHN0cl9yZXBsYWNlX2FsbChjb3VudHJ5LCAidW5pdGVkIGtpbmRvbSIsICJ1ayIpLAogICAgICAgICBjb3VudHJ5ID0gc3RyX3JlcGxhY2VfYWxsKGNvdW50cnksICJlbmdsYW5kIiwgInVrIikKICAgICAgICAgKQoKCmJiX2NhbmR5XzIwMTZfY291bnRyeV9jb3VudF90ZXN0IDwtIGJiX2NhbmR5XzIwMTZfc3RyX3JlcGxhY2VfdGVzdCAlPiUKICBkaXN0aW5jdChjb3VudHJ5KSAlPiUgCiAgc3VtbWFyaXNlKG4oKSkKYmJfY2FuZHlfMjAxNl9jb3VudHJ5X2NvdW50X3Rlc3QKCmRpc3RpbmN0X2NvdW50cnlfcmVtb3ZlIDwtIGJiX2NhbmR5XzIwMTZfc3RyX3JlcGxhY2VfdGVzdCAlPiUgCiAgZGlzdGluY3QoY291bnRyeSkKdmlldyhkaXN0aW5jdF9jb3VudHJ5X3JlbW92ZSkKYGBgCgo=